# setwd("/Users/seantrott/Dropbox/UCSD/Research/NLMs/vlm-vit-num/analysis")
directory_path <- "../results"
csv_files <- list.files(path = directory_path, pattern = "*.csv", full.names = TRUE)
csv_list <- csv_files %>%
map(~ read_csv(.))
## New names:
## Rows: 41860 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 78720 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 29120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 47360 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 18460 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 21120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 3380 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 8320 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 9880 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 24320 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 3380 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 32640 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 8580 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 21120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 15080 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 37120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 21580 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 53120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
df_hf_models <- bind_rows(csv_list)
df_hf_models = df_hf_models %>%
mutate(numerosity_diff = abs(numerosity_2 - numerosity_1)) %>%
mutate(log_params = log10(n_params)) %>%
group_by(model_name) %>%
mutate(max_layer = max(layer)) %>%
mutate(model_type = case_when(
str_detect(model_name, "clip") == TRUE ~ "VLM",
TRUE ~ "ViT")) %>%
mutate(cosine_similarity_z = scale(cosine_similarity),
numerosity_diff_z = scale(numerosity_diff),
area_diff_z = scale(area_diff)) %>%
mutate(model_name2 = str_extract(model_name, "^(?:[^-]*-?){1,3}[^-]*"))
table(df_hf_models$model_name)
##
## clip-big-giant clip-giant
## 44100 73800
## clip-huge-14 clip-vit-base-patch32
## 89100 41860
## clip-vit-large-patch14 vit-base-patch16-224-in21k
## 58000 11700
## vit-huge-patch14-224-in21k vit-large-patch16-224-in21k
## 89100 45000
## vit-large-patch32-224-in21k
## 22500
table(df_hf_models$model_name, df_hf_models$numerosity_comparison_type)
##
## different same
## clip-big-giant 22050 22050
## clip-giant 36900 36900
## clip-huge-14 44550 44550
## clip-vit-base-patch32 20930 20930
## clip-vit-large-patch14 29000 29000
## vit-base-patch16-224-in21k 5850 5850
## vit-huge-patch14-224-in21k 44550 44550
## vit-large-patch16-224-in21k 22500 22500
## vit-large-patch32-224-in21k 11250 11250
table(df_hf_models$model_name, df_hf_models$image_type)
##
## dots rectangles
## clip-big-giant 12740 31360
## clip-giant 21320 52480
## clip-huge-14 25740 63360
## clip-vit-base-patch32 16900 24960
## clip-vit-large-patch14 26000 32000
## vit-base-patch16-224-in21k 3380 8320
## vit-huge-patch14-224-in21k 25740 63360
## vit-large-patch16-224-in21k 13000 32000
## vit-large-patch32-224-in21k 6500 16000
df_hf_models %>%
ggplot(aes(x = numerosity_diff)) +
geom_bar(stat = "count", alpha = .6) +
theme_minimal() +
labs(x = "Difference in Numerosity") +
theme(text = element_text(size = 15),
legend.position = "bottom")
df_hf_models %>%
ggplot(aes(x = area_diff)) +
geom_bar(stat = "count", alpha = .6) +
theme_minimal() +
labs(x = "Difference in Surface Area") +
theme(text = element_text(size = 15),
legend.position = "bottom")
df_hf_models %>%
ggplot(aes(x = cosine_similarity)) +
geom_histogram(alpha = .6) +
theme_minimal() +
labs(x = "Cosine Similarity") +
theme(text = element_text(size = 15)) +
facet_wrap(~model_name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df_summary <- df_hf_models %>%
group_by(model_name2, numerosity_comparison_type, layer, max_layer, image_type) %>%
summarize(avg_similarity = mean(cosine_similarity, na.rm = TRUE),
se_similarity = sd(cosine_similarity, na.rm = TRUE) / sqrt(n()))
## `summarise()` has grouped output by 'model_name2',
## 'numerosity_comparison_type', 'layer', 'max_layer'. You can override using the
## `.groups` argument.
df_summary %>%
filter(layer == max_layer) %>%
ggplot(aes(x = model_name2,
y = avg_similarity,
fill = numerosity_comparison_type)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.5), width = .6) +
# geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(aes(ymin = avg_similarity - se_similarity,
ymax = avg_similarity + se_similarity),
width = 0.2,
position = position_dodge(width = 0.5)) +
labs(# title = "",
x = "Model",
y = "Average Cosine Similarity",
fill = "",
color = "") +
theme_minimal() +
coord_flip() +
scale_fill_viridis(discrete=TRUE) +
# scale_color_viridis_d() +
scale_y_continuous(n.breaks = 3) +
theme(axis.title = element_text(size=rel(1.2)),
axis.text = element_text(size = rel(1.2)),
legend.text = element_text(size = rel(1.2)),
# legend.title = element_text(size = rel(1.5)),
strip.text.x = element_text(size = rel(1.2)),
legend.position = "bottom") +
facet_wrap(~image_type)
m1 = lmer(data = df_hf_models,
cosine_similarity_z ~ area_diff_z *layer + numerosity_comparison_type * layer +
(1 | image_1) + (1 | image_2) +
(1 |model_name))
## boundary (singular) fit: see help('isSingular')
## Warning: Model failed to converge with 1 negative eigenvalue: -3.3e+09
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## cosine_similarity_z ~ area_diff_z * layer + numerosity_comparison_type *
## layer + (1 | image_1) + (1 | image_2) + (1 | model_name)
## Data: df_hf_models
##
## REML criterion at convergence: 905586.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -15.0363 -0.3486 0.0627 0.4737 5.3165
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 3.504e-01 5.919e-01
## image_2 (Intercept) 3.351e-01 5.789e-01
## model_name (Intercept) 4.271e-31 6.536e-16
## Residual 3.821e-01 6.181e-01
## Number of obs: 475160, groups: image_1, 1974; image_2, 1845; model_name, 9
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 7.001e-01 2.165e-02 3.752e+03 32.342
## area_diff_z 7.525e-01 8.799e-03 2.999e+04 85.517
## layer -6.703e-02 1.266e-04 4.719e+05 -529.214
## numerosity_comparison_typesame 1.601e-01 1.729e-02 4.213e+03 9.261
## area_diff_z:layer 6.234e-03 8.855e-05 4.703e+05 70.401
## layer:numerosity_comparison_typesame 2.492e-02 1.784e-04 4.702e+05 139.677
## Pr(>|t|)
## (Intercept) <2e-16 ***
## area_diff_z <2e-16 ***
## layer <2e-16 ***
## numerosity_comparison_typesame <2e-16 ***
## area_diff_z:layer <2e-16 ***
## layer:numerosity_comparison_typesame <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ar_df_ layer nmrs__ ar_d_:
## area_diff_z -0.020
## layer -0.078 -0.067
## nmrsty_cmp_ -0.445 0.111 0.092
## ar_dff_z:ly 0.014 -0.349 -0.079 -0.039
## lyr:nmrst__ 0.058 -0.018 -0.707 -0.143 0.098
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
df_hf_models <- df_hf_models %>%
mutate(model_name2 = str_extract(model_name, "^(?:[^-]*-?){1,3}[^-]*"))
df_hf_models %>%
filter(layer == max_layer) %>%
ggplot(aes(x = numerosity_diff,
y = cosine_similarity,
color = model_type)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
theme_minimal() +
scale_color_viridis(option = "mako", discrete=TRUE) +
labs(# title = "",
x = "Numerosity Difference",
y = "Cosine Similarity",
fill = "",
color = "Model Type") +
theme(text = element_text(size = 12),
legend.position = "bottom") +
facet_wrap(~reorder(model_name2, n_params))
## `geom_smooth()` using formula 'y ~ x'
### Run lm by layer
results <- df_hf_models %>%
# mutate(numerosity_diff = scale(numerosity_diff),
# cosine_similarity = scale(cosine_similarity)) %>%
dplyr::group_by(layer, model_name) %>%
dplyr::summarise(
model_summary = list(
broom::tidy(lm(cosine_similarity_z ~ numerosity_diff_z + area_diff_z, data = dplyr::cur_data()))
),
r_squared = summary(lm(cosine_similarity_z ~ numerosity_diff_z + area_diff_z, data = dplyr::cur_data()))$r.squared
) %>%
dplyr::mutate(
numerosity_diff_z_coef = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "numerosity_diff_z") %>% dplyr::pull(estimate)),
numerosity_diff_z_se = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "numerosity_diff_z") %>% dplyr::pull(std.error)),
area_diff_z_coef = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "area_diff_z") %>% dplyr::pull(estimate)),
area_diff_z_se = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "area_diff_z") %>% dplyr::pull(std.error))
) %>%
dplyr::select(layer, model_name, numerosity_diff_z_coef, numerosity_diff_z_se, area_diff_z_coef, area_diff_z_se, r_squared)
## `summarise()` has grouped output by 'layer'. You can override using the
## `.groups` argument.
results %>%
ggplot(aes(x = layer, y = numerosity_diff_z_coef, fill = model_name)) +
geom_line(linetype = "dotted") + # Lineplot for mean entropy
geom_ribbon(aes(ymin = numerosity_diff_z_coef - numerosity_diff_z_se,
ymax = numerosity_diff_z_coef + numerosity_diff_z_se),
alpha = 0.5,
color = NA) + # Shading for SE
labs(
title = "",
x = "Layer",
y = "Coefficient (Numerosity)",
fill = "",
) +
theme_minimal() +
scale_x_continuous(limits = c(0, max(results$layer)),
breaks = seq(0, max(results$layer), 4)) +
theme(text = element_text(size = 12),
legend.position = "none") +
scale_color_viridis(option = "mako", discrete=TRUE)
results %>%
ggplot(aes(x = layer, y = area_diff_z_coef, fill = model_name)) +
geom_line(linetype = "dotted") + # Lineplot for mean entropy
geom_ribbon(aes(ymin = area_diff_z_coef - area_diff_z_se,
ymax = area_diff_z_coef + area_diff_z_se),
alpha = 0.5,
color = NA) + # Shading for SE
labs(
title = "",
x = "Layer",
y = "Coefficient (Area)",
fill = "",
) +
theme_minimal() +
scale_x_continuous(limits = c(0, max(results$layer)),
breaks = seq(0, max(results$layer), 4)) +
theme(text = element_text(size = 12),
legend.position = "none") +
scale_color_viridis(option = "mako", discrete=TRUE)
m1 = lmer(data = df_hf_models,
cosine_similarity_z ~ area_diff_z * layer + numerosity_diff_z *layer +
(1 | image_1) + (1 | image_2) + (1 | model_name) + (1 | image_type),
control=lmerControl(optimizer="bobyqa"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00222338 (tol = 0.002, component 1)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity_z ~ area_diff_z * layer + numerosity_diff_z *
## layer + (1 | image_1) + (1 | image_2) + (1 | model_name) +
## (1 | image_type)
## Data: df_hf_models
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 809613.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -16.0285 -0.3442 0.1194 0.4822 5.4859
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.15688 0.3961
## image_2 (Intercept) 0.13469 0.3670
## model_name (Intercept) 0.18004 0.4243
## image_type (Intercept) 0.05501 0.2345
## Residual 0.31328 0.5597
## Number of obs: 475160, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.291e-01 2.184e-01 2.987e+00 4.253 0.0240 *
## area_diff_z -8.657e-02 9.515e-03 5.275e+03 -9.098 <2e-16 ***
## layer -6.161e-02 8.577e-05 4.724e+05 -718.327 <2e-16 ***
## numerosity_diff_z 1.450e-02 6.493e-03 3.135e+03 2.234 0.0256 *
## area_diff_z:layer 5.074e-03 7.988e-05 4.714e+05 63.523 <2e-16 ***
## layer:numerosity_diff_z -2.041e-02 8.125e-05 4.747e+05 -251.230 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ar_df_ layer nmrs__ ar_d_:
## area_diff_z -0.012
## layer -0.005 0.000
## nmrsty_dff_ 0.005 -0.152 0.000
## ar_dff_z:ly 0.002 -0.227 0.000 0.036
## lyr:nmrst__ 0.000 -0.015 0.000 -0.162 -0.051
## optimizer (bobyqa) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00222338 (tol = 0.002, component 1)
### Layer depth ratio
results %>%
group_by(model_name) %>%
mutate(max_layer = max(layer),
prop_layer = layer / max_layer) %>%
mutate(binned_prop_layer = ntile(prop_layer, 10)) %>%
mutate(prop_binned = binned_prop_layer / 10) %>%
ggplot(aes(x = prop_binned, y = numerosity_diff_z_coef, fill = model_name)) +
geom_line(linetype = "dotted") + # Lineplot for mean entropy
geom_ribbon(aes(ymin = numerosity_diff_z_coef - numerosity_diff_z_se,
ymax = numerosity_diff_z_coef + numerosity_diff_z_se),
alpha = 0.5,
color = NA) + # Shading for SE
labs(
title = "",
x = "Layer Depth",
y = "Coefficient (Numerosity)",
fill = "",
) +
theme_minimal() +
theme(text = element_text(size = 15),
legend.position = "none") +
scale_color_viridis(option = "mako", discrete=TRUE)
results %>%
group_by(model_name) %>%
mutate(max_layer = max(layer),
prop_layer = layer / max_layer) %>%
mutate(binned_prop_layer = ntile(prop_layer, 10)) %>%
mutate(prop_binned = binned_prop_layer / 10) %>%
ggplot(aes(x = prop_binned, y = numerosity_diff_z_coef)) +
stat_summary(
aes(group = model_name,
color = model_name),
fun = mean,
geom = "line",
size = 2
) +
stat_summary(
aes(group = model_name,
fill = model_name),
fun.data = mean_se,
geom = "ribbon",
alpha = 0.2,
color = NA
) +
theme_minimal() +
labs(
title = "",
x = "Layer Depth",
y = "Coefficient (Numerosity)",
fill = "",
color = "",
) +
scale_color_viridis(option = "mako", discrete = TRUE) +
theme(text = element_text(size = 15),
legend.position = "none")
results %>%
mutate(model_type = case_when(
str_detect(model_name, "clip") == TRUE ~ "VLM",
TRUE ~ "ViT")) %>%
group_by(model_type) %>%
mutate(max_layer = max(layer),
prop_layer = layer / max_layer) %>%
mutate(binned_prop_layer = ntile(prop_layer, 10)) %>%
mutate(prop_binned = binned_prop_layer / 10) %>%
ggplot(aes(x = prop_binned, y = numerosity_diff_z_coef)) +
stat_summary(
aes(group = model_type,
color = model_type),
fun = mean,
geom = "line",
size = 2
) +
stat_summary(
aes(group = model_type,
fill = model_type),
fun.data = mean_se,
geom = "ribbon",
alpha = 0.2,
color = NA
) +
theme_minimal() +
labs(
title = "",
x = "Layer Depth",
y = "Coefficient (Numerosity)",
fill = "",
color = "",
) +
scale_color_viridis(option = "mako", discrete = TRUE) +
theme(text = element_text(size = 15),
legend.position = "bottom")
df_hf_models = df_hf_models %>%
mutate(model_type = case_when(
str_detect(model_name, "clip") == TRUE ~ "VLM",
TRUE ~ "ViT")) %>%
group_by(model_name) %>%
mutate(max_layer = max(layer))
df_summary <- df_hf_models %>%
filter(layer == max_layer) %>%
group_by(model_type, numerosity_comparison_type) %>%
summarize(avg_similarity = mean(cosine_similarity, na.rm = TRUE),
se_similarity = sd(cosine_similarity, na.rm = TRUE) / sqrt(n()))
## `summarise()` has grouped output by 'model_type'. You can override using the
## `.groups` argument.
df_summary %>%
ggplot(aes(x = factor(model_type),
y = avg_similarity,
color = numerosity_comparison_type)) +
geom_point(position = position_dodge(width = 0.5), size = 2) +
geom_errorbar(aes(ymin = avg_similarity - 2 * se_similarity,
ymax = avg_similarity + 2 * se_similarity),
width = 0.2,
position = position_dodge(width = 0.5)) +
labs(# title = "",
x = "Model Type",
y = "Average Cosine Similarity",
color = "") +
theme_minimal() +
scale_color_viridis(discrete=TRUE) +
theme(axis.title = element_text(size=rel(1.2)),
axis.text = element_text(size = rel(1.2)),
legend.text = element_text(size = rel(1.2)),
# legend.title = element_text(size = rel(1.5)),
strip.text.x = element_text(size = rel(1.2)),
legend.position = "bottom")
### NOTE: Interaction is robust to multiverse analysis
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ area_diff * model_type +
numerosity_comparison_type * model_type +
patch_size * numerosity_comparison_type +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## cosine_similarity ~ area_diff * model_type + numerosity_comparison_type *
## model_type + patch_size * numerosity_comparison_type + (1 |
## image_1) + (1 | image_2) + (1 | image_type) + (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58156.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.9686 -0.4350 0.0188 0.4888 5.2955
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.004167 0.06455
## image_2 (Intercept) 0.003527 0.05939
## model_name (Intercept) 0.001066 0.03265
## image_type (Intercept) 0.002045 0.04522
## Residual 0.001165 0.03413
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 8.629e-01 4.651e-02 3.676e+00
## area_diff -2.396e-05 3.928e-06 3.214e+03
## model_typeVLM 2.281e-02 2.238e-02 6.301e+00
## numerosity_comparison_typesame 5.772e-02 2.667e-03 7.327e+03
## patch_size 1.265e-04 1.507e-03 6.005e+00
## area_diff:model_typeVLM 1.519e-05 5.857e-06 2.996e+03
## model_typeVLM:numerosity_comparison_typesame -1.252e-02 2.471e-03 1.456e+04
## numerosity_comparison_typesame:patch_size -3.842e-04 7.719e-05 1.502e+04
## t value Pr(>|t|)
## (Intercept) 18.551 9.12e-05 ***
## area_diff -6.099 1.20e-09 ***
## model_typeVLM 1.020 0.34547
## numerosity_comparison_typesame 21.643 < 2e-16 ***
## patch_size 0.084 0.93583
## area_diff:model_typeVLM 2.593 0.00955 **
## model_typeVLM:numerosity_comparison_typesame -5.066 4.11e-07 ***
## numerosity_comparison_typesame:patch_size -4.978 6.50e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ar_dff md_VLM nmrs__ ptch_s a_:_VL m_VLM:
## area_diff -0.040
## modl_typVLM -0.341 0.030
## nmrsty_cmp_ -0.037 0.161 0.027
## patch_size -0.631 -0.002 0.124 0.010
## ar_dff:_VLM 0.009 -0.483 -0.049 -0.045 0.003
## mdl_tVLM:__ 0.015 -0.082 -0.064 -0.419 0.006 0.085
## nmrsty_c_:_ 0.012 0.014 0.012 -0.426 -0.026 -0.010 -0.190
The parameter estimate for the crucial interaction is negative even when other covariates and interactions are excluded.
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ # area_diff * model_type +
numerosity_comparison_type * model_type +
patch_size * numerosity_comparison_type +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_comparison_type * model_type +
## patch_size * numerosity_comparison_type + (1 | image_1) +
## (1 | image_2) + (1 | image_type) + (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58165.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.9447 -0.4347 0.0175 0.4880 5.2908
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.004193 0.06475
## image_2 (Intercept) 0.003534 0.05945
## model_name (Intercept) 0.001054 0.03246
## image_type (Intercept) 0.001217 0.03489
## Residual 0.001167 0.03415
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 8.513e-01 4.163e-02 5.139e+00
## numerosity_comparison_typesame 6.039e-02 2.634e-03 7.638e+03
## model_typeVLM 2.657e-02 2.222e-02 6.276e+00
## patch_size 1.062e-04 1.498e-03 6.007e+00
## numerosity_comparison_typesame:model_typeVLM -1.371e-02 2.462e-03 1.449e+04
## numerosity_comparison_typesame:patch_size -3.775e-04 7.724e-05 1.508e+04
## t value Pr(>|t|)
## (Intercept) 20.449 4.03e-06 ***
## numerosity_comparison_typesame 22.929 < 2e-16 ***
## model_typeVLM 1.196 0.275
## patch_size 0.071 0.946
## numerosity_comparison_typesame:model_typeVLM -5.570 2.59e-08 ***
## numerosity_comparison_typesame:patch_size -4.887 1.03e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) nmrs__ md_VLM ptch_s n__:_V
## nmrsty_cmp_ -0.034
## modl_typVLM -0.379 0.025
## patch_size -0.701 0.011 0.124
## nmrs__:_VLM 0.014 -0.416 -0.060 0.006
## nmrsty_c_:_ 0.014 -0.434 0.011 -0.026 -0.190
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ area_diff * model_type +
numerosity_comparison_type * model_type +
# patch_size * numerosity_comparison_type +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## cosine_similarity ~ area_diff * model_type + numerosity_comparison_type *
## model_type + (1 | image_1) + (1 | image_2) + (1 | image_type) +
## (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58160.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.9776 -0.4352 0.0172 0.4853 5.2124
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.0041814 0.06466
## image_2 (Intercept) 0.0035469 0.05956
## model_name (Intercept) 0.0009131 0.03022
## image_type (Intercept) 0.0020339 0.04510
## Residual 0.0011655 0.03414
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 8.642e-01 3.548e-02 1.502e+00
## area_diff -2.370e-05 3.936e-06 3.211e+03
## model_typeVLM 2.426e-02 2.060e-02 7.430e+00
## numerosity_comparison_typesame 5.207e-02 2.417e-03 4.902e+03
## area_diff:model_typeVLM 1.499e-05 5.869e-06 2.995e+03
## model_typeVLM:numerosity_comparison_typesame -1.488e-02 2.428e-03 1.488e+04
## t value Pr(>|t|)
## (Intercept) 24.357 0.00623 **
## area_diff -6.023 1.91e-09 ***
## model_typeVLM 1.178 0.27514
## numerosity_comparison_typesame 21.547 < 2e-16 ***
## area_diff:model_typeVLM 2.555 0.01067 *
## model_typeVLM:numerosity_comparison_typesame -6.128 9.14e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ar_dff md_VLM nmrs__ a_:_VL
## area_diff -0.054
## modl_typVLM -0.322 0.033
## nmrsty_cmp_ -0.046 0.185 0.039
## ar_dff:_VLM 0.015 -0.482 -0.054 -0.054
## mdl_tVLM:__ 0.024 -0.081 -0.068 -0.563 0.084
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ # area_diff * model_type +
numerosity_comparison_type * model_type +
# patch_size * numerosity_comparison_type +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_comparison_type * model_type +
## (1 | image_1) + (1 | image_2) + (1 | image_type) + (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58169.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.9539 -0.4359 0.0158 0.4852 5.2092
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.0042072 0.06486
## image_2 (Intercept) 0.0035537 0.05961
## model_name (Intercept) 0.0009029 0.03005
## image_type (Intercept) 0.0012163 0.03488
## Residual 0.0011674 0.03417
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 8.525e-01 2.905e-02 1.857e+00
## numerosity_comparison_typesame 5.481e-02 2.377e-03 5.071e+03
## model_typeVLM 2.798e-02 2.045e-02 7.396e+00
## numerosity_comparison_typesame:model_typeVLM -1.602e-02 2.419e-03 1.480e+04
## t value Pr(>|t|)
## (Intercept) 29.346 0.00172 **
## numerosity_comparison_typesame 23.063 < 2e-16 ***
## model_typeVLM 1.368 0.21146
## numerosity_comparison_typesame:model_typeVLM -6.621 3.68e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) nmrs__ md_VLM
## nmrsty_cmp_ -0.045
## modl_typVLM -0.391 0.035
## nmrs__:_VLM 0.025 -0.562 -0.064
df_hf_models %>%
filter(layer == max_layer) %>%
ggplot(aes(x = numerosity_diff,
y = cosine_similarity,
color = model_type)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "Numerosity Difference",
y = "Cosine Similarity",
color = "") +
scale_color_viridis(option = "mako", discrete=TRUE) +
theme(text = element_text(size = 15),
legend.position = "bottom") +
facet_wrap(~model_type)
## `geom_smooth()` using formula 'y ~ x'
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ area_diff * model_type + numerosity_diff * model_type +
patch_size * numerosity_diff +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ area_diff * model_type + numerosity_diff *
## model_type + patch_size * numerosity_diff + (1 | image_1) +
## (1 | image_2) + (1 | image_type) + (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58801.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0820 -0.4274 0.0196 0.4852 5.3095
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.003299 0.05743
## image_2 (Intercept) 0.002839 0.05328
## model_name (Intercept) 0.001060 0.03256
## image_type (Intercept) 0.001385 0.03722
## Residual 0.001158 0.03402
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.197e-01 4.270e-02 4.759e+00 21.540 6.29e-06
## area_diff -1.738e-05 3.574e-06 3.317e+03 -4.863 1.21e-06
## model_typeVLM 1.070e-02 2.224e-02 6.221e+00 0.481 0.64674
## numerosity_diff -6.628e-03 1.952e-04 7.600e+03 -33.953 < 2e-16
## patch_size -2.949e-04 1.503e-03 6.004e+00 -0.196 0.85088
## area_diff:model_typeVLM 1.395e-05 5.313e-06 2.984e+03 2.626 0.00868
## model_typeVLM:numerosity_diff 1.142e-03 1.698e-04 1.579e+04 6.725 1.82e-11
## numerosity_diff:patch_size 4.862e-05 5.771e-06 1.527e+04 8.424 < 2e-16
##
## (Intercept) ***
## area_diff ***
## model_typeVLM
## numerosity_diff ***
## patch_size
## area_diff:model_typeVLM **
## model_typeVLM:numerosity_diff ***
## numerosity_diff:patch_size ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ar_dff md_VLM nmrst_ ptch_s a_:_VL m_VLM:
## area_diff -0.032
## modl_typVLM -0.371 0.021
## numrsty_dff -0.015 -0.145 0.015
## patch_size -0.685 -0.002 0.126 0.008
## ar_dff:_VLM 0.008 -0.484 -0.038 0.015 0.002
## mdl_tyVLM:_ 0.007 0.072 -0.034 -0.427 0.003 -0.065
## nmrsty_df:_ 0.010 -0.016 0.007 -0.415 -0.018 0.010 -0.216
The parameter estimate for the crucial interaction is positive even when other covariates and interactions are excluded.
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ # area_diff * model_type +
numerosity_diff * model_type +
patch_size * numerosity_diff +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_diff * model_type + patch_size *
## numerosity_diff + (1 | image_1) + (1 | image_2) + (1 | image_type) +
## (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58824.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.1102 -0.4268 0.0189 0.4854 5.3055
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.0032961 0.05741
## image_2 (Intercept) 0.0028395 0.05329
## model_name (Intercept) 0.0010507 0.03241
## image_type (Intercept) 0.0009065 0.03011
## Residual 0.0011593 0.03405
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.133e-01 3.965e-02 5.944e+00 23.033 4.86e-07
## numerosity_diff -6.761e-03 1.928e-04 7.940e+03 -35.064 < 2e-16
## model_typeVLM 1.318e-02 2.213e-02 6.204e+00 0.595 0.573
## patch_size -3.069e-04 1.496e-03 6.003e+00 -0.205 0.844
## numerosity_diff:model_typeVLM 1.202e-03 1.693e-04 1.575e+04 7.099 1.31e-12
## numerosity_diff:patch_size 4.817e-05 5.775e-06 1.532e+04 8.341 < 2e-16
##
## (Intercept) ***
## numerosity_diff ***
## model_typeVLM
## patch_size
## numerosity_diff:model_typeVLM ***
## numerosity_diff:patch_size ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) nmrst_ md_VLM ptch_s n_:_VL
## numrsty_dff -0.022
## modl_typVLM -0.397 0.016
## patch_size -0.735 0.008 0.126
## nmrst_:_VLM 0.010 -0.425 -0.037 0.003
## nmrsty_df:_ 0.010 -0.423 0.007 -0.018 -0.215
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ area_diff * model_type +
numerosity_diff * model_type +
# patch_size * numerosity_diff +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ area_diff * model_type + numerosity_diff *
## model_type + (1 | image_1) + (1 | image_2) + (1 | image_type) +
## (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58764.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0672 -0.4317 0.0217 0.4882 5.2036
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.0033305 0.05771
## image_2 (Intercept) 0.0028755 0.05362
## model_name (Intercept) 0.0009083 0.03014
## image_type (Intercept) 0.0013663 0.03696
## Residual 0.0011608 0.03407
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.150e-01 3.032e-02 1.756e+00 30.172 0.00217
## area_diff -1.692e-05 3.590e-06 3.309e+03 -4.714 2.53e-06
## model_typeVLM 9.550e-03 2.046e-02 7.317e+00 0.467 0.65426
## numerosity_diff -5.948e-03 1.783e-04 5.182e+03 -33.353 < 2e-16
## area_diff:model_typeVLM 1.371e-05 5.339e-06 2.981e+03 2.569 0.01025
## model_typeVLM:numerosity_diff 1.454e-03 1.662e-04 1.614e+04 8.748 < 2e-16
##
## (Intercept) **
## area_diff ***
## model_typeVLM
## numerosity_diff ***
## area_diff:model_typeVLM *
## model_typeVLM:numerosity_diff ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ar_dff md_VLM nmrst_ a_:_VL
## area_diff -0.046
## modl_typVLM -0.375 0.023
## numrsty_dff -0.016 -0.167 0.021
## ar_dff:_VLM 0.013 -0.484 -0.042 0.021
## mdl_tyVLM:_ 0.012 0.070 -0.036 -0.580 -0.064
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
cosine_similarity ~ # area_diff * model_type +
numerosity_diff * model_type +
# patch_size * numerosity_diff +
(1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_diff * model_type + (1 | image_1) +
## (1 | image_2) + (1 | image_type) + (1 | model_name)
## Data: filter(df_hf_models, layer == max_layer)
##
## REML criterion at convergence: -58788.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0949 -0.4329 0.0206 0.4887 5.2007
##
## Random effects:
## Groups Name Variance Std.Dev.
## image_1 (Intercept) 0.0033267 0.05768
## image_2 (Intercept) 0.0028751 0.05362
## model_name (Intercept) 0.0009003 0.03001
## image_type (Intercept) 0.0009053 0.03009
## Residual 0.0011626 0.03410
## Number of obs: 17240, groups:
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.084e-01 2.617e-02 2.174e+00 34.709 0.000504
## numerosity_diff -6.084e-03 1.754e-04 5.375e+03 -34.677 < 2e-16
## model_typeVLM 1.201e-02 2.035e-02 7.296e+00 0.590 0.572864
## numerosity_diff:model_typeVLM 1.509e-03 1.657e-04 1.611e+04 9.106 < 2e-16
##
## (Intercept) ***
## numerosity_diff ***
## model_typeVLM
## numerosity_diff:model_typeVLM ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) nmrst_ md_VLM
## numrsty_dff -0.029
## modl_typVLM -0.432 0.023
## nmrst_:_VLM 0.017 -0.582 -0.039
### Need better way to calculate this, maybe by item?
df_accuracy = df_hf_models %>%
filter(layer == max_layer) %>%
group_by(model_name2, model_type, numerosity_comparison_type, image_1,
max_layer, n_params) %>%
summarise(mean_cos_sim = mean(cosine_similarity)) %>%
pivot_wider(names_from = numerosity_comparison_type, values_from = mean_cos_sim) %>%
mutate(mean_diff = same - different,
accurate = mean_diff > 0) %>%
group_by(model_name2, model_type,
max_layer, n_params) %>%
summarise(accuracy = mean(accurate, na.rm = TRUE))
## `summarise()` has grouped output by 'model_name2', 'model_type',
## 'numerosity_comparison_type', 'image_1', 'max_layer'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'model_name2', 'model_type', 'max_layer'.
## You can override using the `.groups` argument.
df_accuracy %>%
ggplot(aes(x = n_params,
y = accuracy,
color = model_type,
shape = model_type)) +
geom_point(size = 6,
alpha = .9) +
scale_x_log10() +
geom_text_repel(aes(label=model_name2), size=3) +
labs(x = "Parameters",
y = "Accuracy",
color = "",
shape = "") +
theme_minimal() +
# guides(color="none") +
scale_color_viridis(option = "mako", discrete=TRUE) +
theme(text = element_text(size = 12),
legend.position="bottom")